
lulc_dir = file.path(wkdir,'local','gis_data','007_environment','landcover','ESACCI-LC')
xfun::dir_create(lulc_dir)
tmpdir = "C:/temp"

##
## IMPORTANT PATH to Rtools to run Cpp
##
Sys.setenv(PATH = paste("C://WBG//Utility//Rtools//bin", Sys.getenv("PATH"), sep=";"))
Sys.setenv(BINPREF = "C://WBG//Utility//Rtools//mingw_$(WIN)//bin//")

require(Rcpp)
output_version <- '2025-01-28'

rasterOptions(tmpdir = "C:/temp")

ntl.zones <- read_sf(dsn=paste0(wkdir,'/local/gis_data/003_boundaries/fishnet'),layer='fishnet_lake_chad_ntl_adm0_x_d01')
summary(ntl.zones$OBJECTID)
st_crs(ntl.zones)<-4326
ntl.extent <- extent(as(ntl.zones,"Spatial"))

ntl.zones.prj <- ntl.zones %>%
  # 102022
  st_transform(crs="+proj=aea +lat_1=20 +lat_2=-23 +lat_0=0 +lon_0=25 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m no_defs") %>% 
  st_centroid() %>% 
  st_geometry()

M.ntl.zones <- ntl.zones.prj %>% 
  st_transform(4326) %>%
  st_coordinates() 

# convert polygons to points 
# need to check border cases -> INSIDE or NOT? #
ntl.zones.pts = st_as_sf(as.data.frame(M.ntl.zones), coords = c("X", "Y"), crs = 4326)
ntl.zones = st_drop_geometry(subset(ntl.zones,select=c(OBJECTID, GID_0)))
ntl.zones.pts = dplyr::bind_cols(ntl.zones.pts,ntl.zones)

ntl.zones.pts = subset(ntl.zones.pts, OBJECTID %in% c(831,1476,1856,3191,3302,4005,4808,4846,6378,6880,7191,7436,7792,8071,9940,10290,
                                                      10558,11139,12057,12527,12528,12643,12728,13337,14472,15118,15283,18375,19148,
                                                      19604,20175,21076,21890,27404,28842,32478))

spntlpt <- as(ntl.zones.pts,"Spatial")

##
## SELECT CROPS FROM FAO GIEWS REPORTS (except groundnut)
##
ntl.zones <- read_sf(dsn=paste0(wkdir,'/local/gis_data/003_boundaries/fishnet'),layer='fishnet_lake_chad_ntl_adm0_x_d01')
ntl.zones <- ntl.zones %>% st_set_crs(4326)
ntl.zones.only <- subset(ntl.zones,select=c("OBJECTID"))

##
## class 10 = cropland
## class 20 = cropland / irrigated
# set up the cluster object for parallel computing


lulc9215 <- stack(paste0(lulc_dir,'/ESACCI-LC-L4-LCCS-Map-300m-P1Y-1992_2015-v2.0.7.tif'))
names(lulc9215)<- paste0("lulc",1992:2015)
lulc16 <- stack(paste0(lulc_dir,'/C3S-LC-L4-LCCS-Map-300m-P1Y-2016-v2.1.1.tif'))
lulc17 <- raster(paste0(lulc_dir,'/C3S-LC-L4-LCCS-Map-300m-P1Y-2017-v2.1.1.nc'),varname='lccs_class')
lulc18 <- raster(paste0(lulc_dir,'/C3S-LC-L4-LCCS-Map-300m-P1Y-2018-v2.1.1.nc'),varname='lccs_class')
lulc19 <- raster(paste0(lulc_dir,'/C3S-LC-L4-LCCS-Map-300m-P1Y-2019-v2.1.1.nc'),varname='lccs_class')
lulcglobal <- stack(lulc9215,lulc16,lulc17,lulc18,lulc19)
names(lulcglobal)

# manually set outside for 5x5
## first row: xmin, xmax; second row: ymin, ymax
ntl.extent <- c(0.1,24.1,1.6,23.6)
names(lulcglobal) <- paste0("lulc",1992:2019)

terra_lulcglobal <- terra::rast(lulcglobal)
lulc <- terra::crop(terra_lulcglobal,ntl.extent)

# reclass matrix
# 0 = no data
# 1 = cropland
# 2 = cropland mosaic
# 3 = grassland (130)
# 4 = urban
# 5 = bare 
# 6 = other
# 7 = water
# 8 = Cropland, irrigated or post-flooding
m <- c(0,0,1,10,1,11,1,12,8,20,2,30,6,40,6,50,6,60,6,61,6,62,6,70,6,71,6,72,6,80,6,81,6,
       82,6,90,6,100,6,110,6,120,6,121,6,122,3,130,6,140,6,150,6,151,6,152,6,153,6,160,6,170,
       6,180,4,190,5,200,5,201,5,202,7,210,5,220)
rclmat <- matrix(m, ncol=2, byrow=TRUE)
# wrong order fix later #
rclmat <- cbind(rclmat[,2],rclmat[,1])
system.time(rclulc <- terra::classify(lulc, rclmat))
names(rclulc) <- paste0("lulcre",1992:2019)

terra::writeRaster(rclulc, 
            filename = file.path(tmpdir, paste0(names(rclulc), '.tif')), 
            overwrite = TRUE, 
            filetype = "GTiff")

# 
spntl <- as(ntl.zones.only,"Spatial")
n_spntlobjid <- length(unique(spntl@data$OBJECTID))

lulc_tmpdir <- file.path(wkdir,'tmpdir')
xfun::dir_create(lulc_tmpdir)
rclulc_tif_files <- list.files(lulc_tmpdir,"lulcre*",full.names=TRUE)
spntl_sf <- spntl %>% sf::st_as_sf()

# Function to extract raster values and count unique lulc values
extract_all_values <- function(raster_path, spatial_data) {
  
  # Load the raster data
  rclulc_i <- rast(raster_path)
  
  # Extract all raster values (no summary statistics)
  spatial_data2 <- exactextractr::exact_extract(rclulc_i, spntl_sf,
                                                coverage_area = T,
                                                include_cell=T,include_cols='OBJECTID')
  
  spatial_data2_tibble <- tibble::tibble(value = spatial_data2)
  
  # Now spatial_data2_tibble is a tibble with a list-column 'value' containing the raster values.
  # Unnest the list-column and count unique values
  spatial_data_flat <- spatial_data2_tibble %>%
    tidyr::unnest(cols = value) %>%  # Flatten the list-column 'value'
    dplyr::rename(lulc = value)
    
  zDT = as.data.table(spatial_data_flat)
  
  # Step 1: Sum coverage_area by OBJECTID and lulc
  zDT_sum_lulc <- zDT[, .(total_coverage_area_lulc = sum(coverage_area)), 
                      by = .(OBJECTID, lulc)]
  
  # Step 2: Sum coverage_area by OBJECTID (total for the OBJECTID)
  zDT_sum_objectid <- zDT[, .(total_coverage_area_objectid = sum(coverage_area)), 
                          by = OBJECTID]
  
  # Step 3: Merge summed coverage area for OBJECTID back into zDT_sum_lulc
  zDT_sum_lulc <- merge(zDT_sum_lulc, zDT_sum_objectid, by = "OBJECTID")
  
  # Step 4: Calculate the share (percentage) of coverage_area for each lulc
  zDT_sum_lulc[, sh_lulc := (total_coverage_area_lulc / total_coverage_area_objectid)]
  
  # Step 5: Drop the columns total_coverage_area_lulc and total_coverage_area_objectid
  zDT_sum_lulc[, c("total_coverage_area_lulc", "total_coverage_area_objectid") := NULL]
  
  zDF <- as.data.frame(zDT_sum_lulc)
  
  return(zDF)
}

  
#
for (rclulc_i in rclulc_tif_files) {
  yyyy <- substring(basename(rclulc_i),7,10)
  out_fn <- paste0(wkdir,"/local/gis_data/LULCfreq/LULCfreq",yyyy,"_",output_version,".dta")
  xfun::dir_create(dirname(out_fn))
  file.remove(out_fn)
  if(!(file.exists(out_fn))){
    print(paste0("processing into stata",yyyy))
    # 33252
    # Call the function to extract all raster values and count unique lulc values
    out.df <- extract_all_values(rclulc_i, spntl_sf)
    out.df <- out.df %>% rename(objectid = OBJECTID)
    out.df <- out.df %>%
      dplyr::mutate(year = yyyy)
    attr(out.df$objectid, "label") <- "objectid of grid"
    attr(out.df$year, "label") <- "year"
    attr(out.df$sh_lulc, "label") <- 'LULC share of area'
    # Assign value labels to 'lulc' (mapping numeric values to meaningful categories)
    # 0 = no data
    # 1 = cropland
    # 2 = cropland mosaic
    # 3 = grassland (130)
    # 4 = urban
    # 5 = bare 
    # 6 = other
    # 7 = water
    # 8 = Cropland, irrigated or post-flooding
    out.df$lulc <- haven::labelled(out.df$lulc, labels = c("No data" = 0, 
                                                           "Cropland" = 1, 
                                                           "Cropland mosaic" = 2,
                                                           "grassland (130)" = 3,
                                                           "urban" = 4,
                                                           "bare" = 5,
                                                           "other" = 6,
                                                           "water" = 7,
                                                           "Cropland, irrigated or post-flooding" = 8
    ), label = 'Land Use / Land Cover : ESA')
    attr(out.df, "label") <- sprintf("*landcover_reclass_crosstab*.R last run on %s",Sys.Date())
    ## Save the data
    haven::write_dta(out.df, out_fn)
    
  }
}
